import re
import pandas


import os.path


csv_folder_path = '/Users/zhubin/Desktop/宁波51/'

m1 = '^[\d]+[.]?[\d]?-[\d]+[.]?[\d]?万/月$'
m2 = '^[\d]+[.]?[\d]?-[\d]+[.]?[\d]?千/月$'
m3 = '^[\d]+[.]?[\d]?-[\d]+[.]?[\d]?万/年$'
# m4 = '^[\d]+[.]?[\d]?-[\d]+[.]?[\d]?千/年$'
m5 = '^[\d]+[.]?[\d]?千以下/月$'
m6 = '^[\d]+[.]?[\d]?万以下/年$'
m7 = '^[\d]+元/天$'

ms = [m1, m2, m3, m5, m6, m7]


def process_data(path: str):
    file_path = os.path.join('%s%s' % (csv_folder_path, path))
    csvFile = pandas.read_csv(file_path)  # type: DataFrame
    csvFile['薪资下限'] = None
    csvFile['薪资上限'] = None
    lines = csvFile.shape[0]
    for i in range(lines):

        salaryString = csvFile['薪资范围'][i:i + 1][i]  # type: str

        for j in range(len(ms)):
            if re.match(ms[j], salaryString):
                numMathPattern = '[\d]+[.]?[\d]?'
                rec = re.compile(numMathPattern)
                numList = rec.findall(salaryString)
                lowerLimitNum = 0  # type: float
                upperLimitNum = 0  # type: float
                if j < 3:
                    lowerLimitNum = float(numList[0])
                    upperLimitNum = float(numList[1])

                else:
                    upperLimitNum = float(numList[0])
                    if j == 5:
                        lowerLimitNum = upperLimitNum

                # 处理单位：千
                if j in [1, 3]:
                    lowerLimitNum *= 1000
                    upperLimitNum *= 1000
                # 处理单位：万
                elif j in [0, 2, 4]:
                    lowerLimitNum *= 10000
                    upperLimitNum *= 10000

                # 转换为月薪
                if j == 5:
                    lowerLimitNum *= 30
                    upperLimitNum *= 30
                elif j in [2, 4]:
                    lowerLimitNum /= 12
                    upperLimitNum /= 12
                csvFile['薪资下限'][i:i + 1][i] = lowerLimitNum
                csvFile['薪资上限'][i:i + 1][i] = upperLimitNum

                break

            print('%d %s' % (i, salaryString))

    NONE_VIN = (csvFile["薪资下限"].isnull()) | (csvFile["薪资下限"].apply(lambda x: str(x).isspace()))
    csvFile = csvFile[~NONE_VIN]

    out_path = '%s处理后数据/%s' % (csv_folder_path, file)
    csvFile.to_csv(out_path, index=None, encoding="utf_8_sig")


path_dir = os.listdir(csv_folder_path)
for file in path_dir:
    if file.endswith('.csv'):
        print(file)
        process_data(file)

exit()

